/**
 * WebUtil.java 
 * Copyright © 2015-3015, 杭州泊享网络科技有限公司
 * 
 * @author Unknown
 * @create 2015年1月8日
 */
package com.pshare.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;


/**
 * web工具集
 */
public class WebUtil {
	
    public static final Pattern SCRIPT_PATTERN = Pattern.compile("<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>", Pattern.CASE_INSENSITIVE);
    public static final Pattern STYLE_PATTERN  = Pattern.compile("<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>", Pattern.CASE_INSENSITIVE);
    public static final Pattern HTML_PATTERN   = Pattern.compile("<[^>]+>", Pattern.CASE_INSENSITIVE);
    public static final Pattern HTML1_PATTERN  = Pattern.compile("<[^>]+", Pattern.CASE_INSENSITIVE);
    
    /***
     * 将html转换为文本 去掉标签
     * 
     * @param inputString
     * @return
     */
    public static String html2Text(String inputString) {

        if (StringUtils.isBlank(inputString)) {
            return inputString;
        }

        String htmlStr = inputString;
        String textStr = "";
        Matcher m_script;
        Matcher m_style;
        Matcher m_html;
        Matcher m_html1;
        try {
            m_script = SCRIPT_PATTERN.matcher(htmlStr);
            htmlStr = m_script.replaceAll(""); // 过滤script标签
            m_style = STYLE_PATTERN.matcher(htmlStr);
            htmlStr = m_style.replaceAll(""); // 过滤style标签
            m_html = HTML_PATTERN.matcher(htmlStr);
            htmlStr = m_html.replaceAll(""); // 过滤html标签
            m_html1 = HTML1_PATTERN.matcher(htmlStr);
            htmlStr = m_html1.replaceAll(""); // 过滤html标签
            textStr = htmlStr;
        } catch (Exception e) {
        }
        return textStr;// 返回文本字符串
    }
}
